Using positions from single dimensions as clusters, perform scRNAseq cluster analysis to examine gene differential expression between positional clusters and dimensional reduction for positional samples. Following: https://bioconductor.org/packages/release/bioc/vignettes/SC3/inst/doc/SC3.html#de-genes
library(raster)
library(SC3)
library(SingleCellExperiment)
library(scater)
library(plotly)
library(cowplot)
library(knitr)
library(patchwork)
library(tidyverse)
allchemo_tpm <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/covarintactchemo_over_samples_200923.csv") %>% select(-X1) %>% filter(name != "M14S23")
info <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/info_201129.csv") %>%
rename("olfrname" = "gene")
set.seed(711)
allchemo_dim <- allchemo_tpm %>% filter(dim == "AntPos")
sample_names <- allchemo_dim$name
covars <- allchemo_dim %>% select(name:dimrep)
allchemo_trim <- allchemo_dim %>% select(-name, -rep, -slice, -dim, -dimrep) %>% t()
colnames(allchemo_trim) <- sample_names
allchemo_trim[1:5,1:5]
## M10S01 M10S02 M10S03 M10S04 M10S05
## Olfr299 0.00 277.35 0.00 0.00 0.00
## Olfr109 0.00 0.00 99.66 0.00 0.00
## Olfr281 0.33 602.80 2451.54 1882.25 2612.72
## Olfr1015 5.77 228.19 0.11 340.79 0.00
## Olfr1347 0.00 0.30 0.00 0.00 0.00
allchemo_coldata <- covars %>% select(name, slice)
coldata_ac <- data.frame("cell_type1" = allchemo_coldata$slice)
rownames(coldata_ac) <- allchemo_coldata$name
sce <- SingleCellExperiment(
assays = list(
counts = as.matrix(allchemo_trim),
logcounts = log2(as.matrix(allchemo_trim) + 1)
),
colData = coldata_ac
)
rowData(sce)$feature_symbol <- rownames(sce)
sce_pca <- runPCA(sce)
plotPCA(sce_pca, colour_by = "cell_type1")
sce_tsne <- runTSNE(sce)
plotTSNE(sce_tsne, colour_by = "cell_type1")
sce_umap <- runUMAP(sce)
plotUMAP(sce_umap, colour_by = "cell_type1")
umap_rd <- reducedDim(sce_umap) %>% as_tibble()
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
umap_tibble <- tibble(name = rownames(reducedDim(sce_umap)),
UMAP_1 = umap_rd$V1, UMAP_2 = umap_rd$V2) %>%
left_join(covars, by = "name") %>%
mutate(slice_fct = as_factor(slice))
umapplot <- ggplot(umap_tibble) +
geom_point(aes(UMAP_1, UMAP_2, fill = slice),
size = 6, pch = 21, color = "black") +
scale_shape_manual(values=c(0, 1, 2)) +
scale_fill_viridis_c() +
theme_cowplot() +
ggtitle("UMAP of OR Gene Expression",
subtitle = paste0(as.character(dim(allchemo_trim)[2]),
"VD sections from 3 mice")) +
theme(legend.position = "none") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red")
umapplot
#ggsave(umapplot, filename = "~/Desktop/obmap/r_analysis/sc_style/output/ap_umap.png", device = "png")
sce_umapcomp <- runUMAP(sce, ncomponents = 5)
plotUMAP(sce_umapcomp, ncomponents = 5, colour_by = "cell_type1")
set.seed(711)
alldim <- allchemo_tpm %>% filter(rep != 16)
ad_sample_names <- alldim$name
ad_covars <- alldim %>% select(name:dimrep)
ad_trim <- alldim %>% select(-name, -rep, -slice, -dim, -dimrep) %>% t()
colnames(ad_trim) <- ad_sample_names
ad_trim[1:5,1:5]
## M10S01 M10S02 M10S03 M10S04 M10S05
## Olfr299 0.00 277.35 0.00 0.00 0.00
## Olfr109 0.00 0.00 99.66 0.00 0.00
## Olfr281 0.33 602.80 2451.54 1882.25 2612.72
## Olfr1015 5.77 228.19 0.11 340.79 0.00
## Olfr1347 0.00 0.30 0.00 0.00 0.00
adcoldata <- ad_covars %>% select(name, slice)
ad_coldata_ac <- data.frame("cell_type1" = adcoldata$slice)
rownames(ad_coldata_ac) <- adcoldata$name
ad_sce <- SingleCellExperiment(
assays = list(
counts = as.matrix(ad_trim),
logcounts = log2(as.matrix(ad_trim) + 1)
),
colData = ad_coldata_ac
)
rowData(ad_sce)$feature_symbol <- rownames(ad_sce)
ad_umap <- runUMAP(ad_sce)
ad_umap_rd <- reducedDim(ad_umap) %>% as_tibble()
ad_tibble <- tibble(name = rownames(reducedDim(ad_umap)),
UMAP_1 = ad_umap_rd$V1, UMAP_2 = ad_umap_rd$V2) %>%
left_join(ad_covars, by = "name") %>%
mutate(slice_fct = as_factor(slice),
dimrep_fct = as_factor(dimrep),
index = 1:n())
ad_umap_plot <- ggplot(ad_tibble) +
geom_point(aes(UMAP_1, UMAP_2, fill = slice_fct),
size = 6, pch = 21, color = "black") +
scale_fill_viridis_d() +
theme_cowplot() +
ggtitle("UMAP of OR Gene Expression",
subtitle = paste0(as.character(dim(ad_trim)[2]),
" alldim sections from 11 mice")) +
theme(legend.position = "none") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red")
#hiro doesnt like the simple all dim plot colored by slice, make his request
hmt1 <- ad_tibble %>% mutate(facet = "AntPos",
coloredin = ifelse(dimrep == 1, "B", "A"),
transp = ifelse(dimrep == 1, 1, 0.9))
hmt2 <- ad_tibble %>% mutate(facet = "MedLat",
coloredin = ifelse(dimrep == 2, "B", "A"),
transp = ifelse(dimrep == 2, 1, 0.9))
hmt3 <- ad_tibble %>% mutate(facet = "VenDor",
coloredin = ifelse(dimrep == 3, "B", "A"),
transp = ifelse(dimrep == 3, 1, 0.9))
hm12 <- bind_rows(hmt1, hmt2)
hm123 <- bind_rows(hm12, hmt3)
hm_umap_plot <- ggplot(hm123) +
geom_point(aes(UMAP_1, UMAP_2, fill = coloredin, alpha = transp),
size = 6, pch = 21, color = "black") +
scale_fill_manual(values = c("white", "red")) +
theme_cowplot() +
ggtitle("UMAP of OR Gene Expression",
subtitle = paste0(as.character(dim(ad_trim)[2]),
" alldim sections from 12 mice")) +
theme(legend.position = "none") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
facet_wrap(~ facet)
ad_facets <- ggplot(ad_tibble) +
geom_point(aes(UMAP_1, UMAP_2, fill = slice),
size = 6, pch = 21, color = "black") +
facet_wrap(~ dim) +
scale_fill_viridis_c() +
theme_cowplot() +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red")
alldimplots <- ad_umap_plot / ad_facets
hmplots <- hm_umap_plot / ad_facets
alldimplots
hmplots
#ggsave(alldimplots, filename = "~/Desktop/obmap/r_analysis/sc_style/alldim_umap.png", device = "png")
ad_xy <- cbind(ad_tibble$UMAP_1, ad_tibble$UMAP_2)
distances <- pointDistance(ad_xy, lonlat = F)
distances[which(distances == 0)] <- NA
#check that no points have identical coordinates
sum(is.na(distances)) == nrow(distances)
## [1] TRUE
bestdist <- vector(mode = "numeric", length = nrow(distances))
for (i in 1:nrow(distances)) {
bestdist[i] <- which(distances[i,] == min(distances[i,], na.rm=T))
if (i == 1) {
best_tibble <- ad_tibble[bestdist[i],]
} else if (i == nrow(distances)) {
newline <- ad_tibble[bestdist[i],]
best_tibble <- bind_rows(best_tibble, newline) %>% rename(bname = name,
bUMAP_1 = UMAP_1,
bUMAP_2 = UMAP_2,
brep = rep,
bslice = slice,
bdim = dim,
bdimrep = dimrep,
bslice_fct = slice_fct,
bdimrep_fct = dimrep_fct,
bindex = index)
} else {
newline <- ad_tibble[bestdist[i],]
best_tibble <- bind_rows(best_tibble, newline)
}
}
best_friends <- bind_cols(ad_tibble, best_tibble) %>%
mutate(same_dim = ifelse(dim == bdim, T, F),
same_rep = ifelse(rep == brep, T, F),
slice_diff = abs(slice - bslice),
dimdim = paste(dim, bdim, sep = "_"),
dist = sqrt((UMAP_1 - bUMAP_1)^2) + (UMAP_2 - bUMAP_2)^2)
ggplot(best_friends) +
geom_histogram(aes(dist)) +
geom_vline(xintercept = mean(best_friends$dist),
color = "red", linetype = 2) +
geom_vline(xintercept = median(best_friends$dist),
color = "blue", linetype = 2) +
ggtitle("Histogram of distance between closest points",
subtitle = "Median is blue, Mean is red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(best_friends) +
geom_bar(aes(same_dim)) +
ggtitle("Neighbors are typically from same dimension")
ggplot(best_friends %>% filter(same_dim == T)) +
geom_bar(aes(same_rep)) +
ggtitle("Neighbors from same dim are typically not from same replicate")
ggplot(best_friends %>% filter(same_dim == T) %>% filter(same_rep == F)) +
geom_jitter(aes(slice, bslice)) +
ggtitle("Neighbors from same dim but diff rep, are typically from similar position")
#what about closest points that are not from same dim?
ggplot(best_friends %>%
filter(same_dim == F) %>%
group_by(dimdim) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(dimdimfct = as_factor(dimdim))) +
geom_bar(aes(dimdimfct, count), stat= "identity") +
ggtitle("Neighbors from different dims are typically DV/ML hybrids")
## `summarise()` ungrouping output (override with `.groups` argument)
ad_tibble <- tibble(name = rownames(reducedDim(ad_umap)),
UMAP_1 = ad_umap_rd$V1, UMAP_2 = ad_umap_rd$V2) %>%
left_join(ad_covars, by = "name") %>%
mutate(slice_fct = as_factor(slice),
dimrep_fct = as_factor(dimrep),
numlab = ifelse(slice == 1,
1, ifelse(slice == 5,
5, ifelse(slice == 10,
10, ifelse(slice == 15,
15, ifelse(slice == 20,
20, ifelse(slice == 23,
23, NA)))))))
ad_umap_plot <- ggplot(ad_tibble) +
geom_point(aes(UMAP_1, UMAP_2, fill = dimrep_fct),
size = 6, pch = 21, color = "black") +
geom_label_repel(aes(UMAP_1, UMAP_2, label = numlab)) +
scale_fill_viridis_d() +
theme_cowplot() +
ggtitle("UMAP of OR Gene Expression",
subtitle = paste0(as.character(dim(ad_trim)[2]),
" AP sections from 6 mice")) +
theme(legend.position = "none") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
facet_wrap(~ dimrep)
Very ordered set of points reflects the distinct nature of OR positions along the AP axis with the 2 glomeruli positioned in approximately located sections.
include_graphics("AP_umap.png")
Lack of order is expected as the 2 glomeruli per OR can be located in very far apart or very close sections based on distance to mirror symmetry line that separates the ML dimension.
include_graphics("ML_umap.png")
Order is expected as both glomeruli should be located in the same VD section since OB position is related to OE position
include_graphics("VD_umap.png")
Interactive is very good, but keeping plot code for output
sce_sc3 <- sc3(sce, ks = 3:10, biology = TRUE)
## Setting SC3 parameters...
## Warning: 'isSpike' is deprecated.
## See help("Deprecated")
## Calculating distances between the cells...
## Performing transformations and calculating eigenvectors...
## Performing k-means clustering...
## Calculating consensus matrix...
## Calculating biology...
#sc3_interactive(sce_sc3)
sc3_export_results_xls(sce_sc3, filename = "~/Desktop/obmap/r_analysis/sc_style/output/ap_sc3.xls")
plotPCA(
sce_sc3,
colour_by = "sc3_5_clusters",
size_by = "sc3_5_log2_outlier_score")
## Warning: call 'runPCA' explicitly to compute results
sc3_plot_consensus(
sce_sc3, k = 5,
show_pdata = c(
"cell_type1",
"log10_total_features",
"sc3_5_clusters",
"sc3_5_log2_outlier_score"))
## Provided columns 'log10_total_features' do not exist in the phenoData table!
sc3_plot_silhouette(sce_sc3, k = 5)
sc3_plot_expression(sce_sc3, k = 5)
sc3_plot_cluster_stability(sce_sc3, k = 5)
## Warning: Use of `d$Cluster` is discouraged. Use `Cluster` instead.
## Warning: Use of `d$Stability` is discouraged. Use `Stability` instead.
sc3_plot_de_genes(
sce_sc3, k = 5,
show_pdata = c(
"cell_type1",
"log10_total_features",
"sc3_5_clusters",
"sc3_5_log2_outlier_score"))
## Provided columns 'log10_total_features' do not exist in the phenoData table!
sc3_plot_markers(
sce_sc3, k = 5,
show_pdata = c(
"cell_type1",
"log10_total_features",
"sc3_3_clusters",
"sc3_3_log2_outlier_score"))
## Provided columns 'log10_total_features' do not exist in the phenoData table!
or_dim <- allchemo_tpm %>% select(starts_with("Olfr"))
or_names <- colnames(or_dim)
or_trim <- or_dim %>% as.matrix()
colnames(or_trim) <- or_names
or_trim[1:5,1:5]
## Olfr299 Olfr109 Olfr281 Olfr1015 Olfr1347
## [1,] 0.00 0.00 0.33 5.77 0.0
## [2,] 277.35 0.00 602.80 228.19 0.3
## [3,] 0.00 99.66 2451.54 0.11 0.0
## [4,] 0.00 0.00 1882.25 340.79 0.0
## [5,] 0.00 0.00 2612.72 0.00 0.0
coldata_or <- data.frame("cell_type1" = c(1:length(or_names)))
rownames(coldata_or) <- or_names
or_sce <- SingleCellExperiment(
assays = list(
counts = as.matrix(or_trim),
logcounts = log2(as.matrix(or_trim) + 1)
),
colData = coldata_or
)
rowData(or_sce)$feature_symbol <- rownames(or_sce)
#UMAP
or_umap <- runUMAP(or_sce)
plotUMAP(or_umap, colour_by = "cell_type1")
lancet <- read_csv("~/Downloads/lancet_mouseORnames.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Symbol = col_character(),
## Pseudo = col_double(),
## Olfrname = col_character(),
## alias = col_character()
## )
or_rd <- reducedDim(or_umap) %>% as_tibble()
or_umap_tibble <- tibble(Olfrname = rownames(reducedDim(or_umap)),
UMAP_1 = or_rd$V1, UMAP_2 = or_rd$V2) %>%
left_join(lancet, by = "Olfrname") %>%
rename(olfrname = Olfrname) %>%
left_join(info, by = "olfrname") %>%
select(olfrname:class, fisurface, tan_zone) %>%
mutate(symfam = as.numeric(str_extract(Symbol, "[0-9]+")),
orsort = 1:n()) %>%
arrange(class) %>%
mutate(csort = 1:n()) %>%
arrange(symfam) %>%
mutate(sfsort = 1:n()) %>%
arrange(orsort) %>%
filter(!is.na(class)) %>%
filter(!is.na(symfam))
ggplot(or_umap_tibble) +
geom_point(aes(UMAP_1, UMAP_2, fill = as_factor(symfam)),
size = 2, pch = 21, color = "black") +
scale_fill_viridis_d() +
theme_cowplot() +
ggtitle("ORs by family (color)",
subtitle = "Is that a fish?") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
theme(legend.position = "none")